Project-651

Author

Mrudula Nimmala, Sowmya Sree Kemsaram, Shreyas Mali

# Load the dataset
file_path <- "ufo-sightings-transformed.csv"
ufo_dataset <- read.csv(file_path)


# Overview of the dataset
head(ufo_dataset)
  X           Date_time date_documented Year Month Hour Season Country_Code
1 0 1949-10-10 20:30:00       4/27/2004 1949    10   20 Autumn          USA
2 1 1949-10-10 21:00:00      12/16/2005 1949    10   21 Autumn          USA
3 2 1955-10-10 17:00:00       1/21/2008 1955    10   17 Autumn          GBR
4 3 1956-10-10 21:00:00       1/17/2004 1956    10   21 Autumn          USA
5 4 1960-10-10 20:00:00       1/22/2004 1960    10   20 Autumn          USA
6 5 1961-10-10 19:00:00       4/27/2007 1961    10   19 Autumn          USA
         Country    Region       Locale latitude   longitude UFO_shape
1  United States     Texas   San Marcos 29.88306  -97.941111  Cylinder
2  United States     Texas Bexar County 29.38421  -98.581082     Light
3 United Kingdom   England      Chester 53.20000   -2.916667    Circle
4  United States     Texas         Edna 28.97833  -96.645833    Circle
5  United States    Hawaii      Kaneohe 21.41806 -157.803611     Light
6  United States Tennessee      Bristol 36.59500  -82.188889    Sphere
  length_of_encounter_seconds Encounter_Duration
1                        2700         45 minutes
2                        7200            1-2 hrs
3                          20         20 seconds
4                          20           1/2 hour
5                         900         15 minutes
6                         300          5 minutes
                                                                                                                                                 Description
1                    This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit
2                                                            1949 Lackland AFB&#44 TX.  Lights racing across the sky &amp; making 90 degree turns on a dime.
3                                                                                                        Green/Orange circular disc over Chester&#44 England
4                 My older brother and twin sister were leaving the only Edna theater at about 9 PM&#44...we had our bikes and I took a different route home
5 AS a Marine 1st Lt. flying an FJ4B fighter/attack aircraft on a solo night exercise&#44 I was at 50&#44000&#39 in a &quot;clean&quot; aircraft (no ordinan
6                 My father is now 89 my brother 52 the girl with us now 51 myself 49 and the other fellow which worked with my father if he&#39s still livi
summary(ufo_dataset)
       X          Date_time         date_documented         Year     
 Min.   :    0   Length:80328       Length:80328       Min.   :1906  
 1st Qu.:20082   Class :character   Class :character   1st Qu.:2001  
 Median :40164   Mode  :character   Mode  :character   Median :2006  
 Mean   :40164                                         Mean   :2004  
 3rd Qu.:60245                                         3rd Qu.:2011  
 Max.   :80327                                         Max.   :2014  
     Month             Hour          Season          Country_Code      
 Min.   : 1.000   Min.   : 0.00   Length:80328       Length:80328      
 1st Qu.: 4.000   1st Qu.:10.00   Class :character   Class :character  
 Median : 7.000   Median :19.00   Mode  :character   Mode  :character  
 Mean   : 6.835   Mean   :15.53                                        
 3rd Qu.: 9.000   3rd Qu.:21.00                                        
 Max.   :12.000   Max.   :23.00                                        
   Country             Region             Locale             latitude     
 Length:80328       Length:80328       Length:80328       Min.   :-82.86  
 Class :character   Class :character   Class :character   1st Qu.: 34.13  
 Mode  :character   Mode  :character   Mode  :character   Median : 39.41  
                                                          Mean   : 38.12  
                                                          3rd Qu.: 42.79  
                                                          Max.   : 72.70  
   longitude        UFO_shape         length_of_encounter_seconds
 Min.   :-176.66   Length:80328       Min.   :       0           
 1st Qu.:-112.07   Class :character   1st Qu.:      30           
 Median : -87.90   Mode  :character   Median :     180           
 Mean   : -86.77                      Mean   :    9017           
 3rd Qu.: -78.75                      3rd Qu.:     600           
 Max.   : 178.44                      Max.   :97836000           
 Encounter_Duration Description       
 Length:80328       Length:80328      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
str(ufo_dataset)
'data.frame':   80328 obs. of  17 variables:
 $ X                          : int  0 1 2 3 4 5 6 7 8 9 ...
 $ Date_time                  : chr  "1949-10-10 20:30:00" "1949-10-10 21:00:00" "1955-10-10 17:00:00" "1956-10-10 21:00:00" ...
 $ date_documented            : chr  "4/27/2004" "12/16/2005" "1/21/2008" "1/17/2004" ...
 $ Year                       : int  1949 1949 1955 1956 1960 1961 1965 1965 1966 1966 ...
 $ Month                      : int  10 10 10 10 10 10 10 10 10 10 ...
 $ Hour                       : int  20 21 17 21 20 19 21 23 20 21 ...
 $ Season                     : chr  "Autumn" "Autumn" "Autumn" "Autumn" ...
 $ Country_Code               : chr  "USA" "USA" "GBR" "USA" ...
 $ Country                    : chr  "United States" "United States" "United Kingdom" "United States" ...
 $ Region                     : chr  "Texas" "Texas" "England" "Texas" ...
 $ Locale                     : chr  "San Marcos" "Bexar County" "Chester" "Edna" ...
 $ latitude                   : num  29.9 29.4 53.2 29 21.4 ...
 $ longitude                  : num  -97.94 -98.58 -2.92 -96.65 -157.8 ...
 $ UFO_shape                  : chr  "Cylinder" "Light" "Circle" "Circle" ...
 $ length_of_encounter_seconds: num  2700 7200 20 20 900 300 180 1200 180 120 ...
 $ Encounter_Duration         : chr  "45 minutes" "1-2 hrs" "20 seconds" "1/2 hour" ...
 $ Description                : chr  "This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church"| __truncated__ "1949 Lackland AFB&#44 TX.  Lights racing across the sky &amp; making 90 degree turns on a dime." "Green/Orange circular disc over Chester&#44 England" "My older brother and twin sister were leaving the only Edna theater at about 9 PM&#44...we had our bikes and I "| __truncated__ ...
colSums(ufo_dataset == "" | is.na(ufo_dataset))
                          X                   Date_time 
                          0                           0 
            date_documented                        Year 
                          0                           0 
                      Month                        Hour 
                          0                           0 
                     Season                Country_Code 
                          0                         259 
                    Country                      Region 
                        259                         566 
                     Locale                    latitude 
                        457                           0 
                  longitude                   UFO_shape 
                          0                        1930 
length_of_encounter_seconds          Encounter_Duration 
                          0                           0 
                Description 
                         15 
# # Remove rows where any column has missing or blank values
# ufo_dataset <- ufo_dataset[complete.cases(ufo_dataset), ]


# Remove rows with missing or blank values
ufo_dataset <- ufo_dataset[!(apply(ufo_dataset, 1, function(row) any(row == "" | is.na(row)))), ]




library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
# Convert Date_time to datetime
ufo_dataset$Date_time <- ymd_hms(ufo_dataset$Date_time)

# Convert date_documented to date
ufo_dataset$date_documented <- mdy(ufo_dataset$date_documented)



# Convert columns to categorical variables (factors)
ufo_dataset$Season <- as.factor(ufo_dataset$Season)
ufo_dataset$Country_Code <- as.factor(ufo_dataset$Country_Code)
ufo_dataset$Country <- as.factor(ufo_dataset$Country)
ufo_dataset$Region <- as.factor(ufo_dataset$Region)
ufo_dataset$Locale <- as.factor(ufo_dataset$Locale)
ufo_dataset$UFO_shape <- as.factor(ufo_dataset$UFO_shape)

Get Area-51 Coordinates.

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(geosphere)
Warning: package 'geosphere' was built under R version 4.3.3
# Coordinates for Area 51
area51_coords <- c(-115.808, 37.233)

# Function to calculate distance from Area 51
calculate_distance <- function(lon, lat) {
  dist <- distGeo(c(lon, lat), area51_coords) / 1609.34  # Convert to miles
  return(dist)
}

# Add a distance column to the dataset
data <- ufo_dataset %>%
  mutate(Distance_to_Area51 = mapply(calculate_distance, longitude, latitude))

# Filter for sightings within 50 miles of Area 51
area51_sightings <- data %>%
  filter(Distance_to_Area51 <= 50)
library(leaflet)

leaflet(data = area51_sightings) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~longitude,
    lat = ~latitude,
    popup = ~paste(
      "Date and Time:", Date_time, "<br>",
      "Season:", Season, "<br>",
      "Region:", Region, "<br>",
      "Locale:", Locale
    ),
    radius = 5,
    color = "red",
    fillOpacity = 0.8
  ) %>%
  setView(lng = -115.808, lat = 37.233, zoom = 8)
library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
# Define Area 51 coordinates
area51_coords <- data.frame(longitude = -115.808, latitude = 37.233)

# Adjust the density plot and annotate Area 51
gg_density <- ggplot(area51_sightings, aes(x = longitude, y = latitude)) +
  geom_density2d() +
  stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.4) +
  geom_point(data = area51_coords, aes(x = longitude, y = latitude), 
             color = "red", size = 3) +  # Mark Area 51
  annotate("text", x = -115.808, y = 37.233, label = "Area 51", 
           color = "red", size = 5, hjust = 0, vjust = -1) +  # Label Area 51
  labs(title = "Density of UFO Sightings Near Area 51",
       x = "Longitude", y = "Latitude") +
  xlim(-116, -115.5) +  
  ylim(36.5, 37.5) +    
  theme_minimal()

# Convert to an interactive plot
interactive_density <- ggplotly(gg_density)

# Display the plot
interactive_density

Time-series patterns at AREA 51

Extract Date and Time From the Dataset

# Ensure the Date_time column is in the correct datetime format
area51_sightings$Date_time <- as.POSIXct(area51_sightings$Date_time, format = "%Y-%m-%d %H:%M:%S")

# Extract Year, Month, Day, and Hour
area51_sightings$Year <- format(area51_sightings$Date_time, "%Y")
area51_sightings$Month <- format(area51_sightings$Date_time, "%m")
area51_sightings$Day <- format(area51_sightings$Date_time, "%d")
area51_sightings$Hour <- format(area51_sightings$Date_time, "%H")

Analysing Sightings by Year

library(ggplot2)

# Group by year and count sightings
sightings_by_year <- area51_sightings %>%
  group_by(Year) %>%
  summarise(Sightings = n())

# Plot the trend over years
ggplot(sightings_by_year, aes(x = as.numeric(Year), y = Sightings)) +
  geom_line(color = "blue", size = 1) +
  geom_point(size = 2) +
  labs(title = "UFO Sightings Near Area 51 Over the Years",
       x = "Year", y = "Number of Sightings") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Analyze Sightings by Month

# Group by month and count sightings
sightings_by_month <- area51_sightings %>%
  group_by(Month) %>%
  summarise(Sightings = n())

# Plot the trend over months
ggplot(sightings_by_month, aes(x = Month, y = Sightings)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  labs(title = "UFO Sightings Near Area 51 by Month",
       x = "Month", y = "Number of Sightings") +
  theme_minimal()

Analyze Sightings by Hour

# Group by hour and count sightings
sightings_by_hour <- area51_sightings %>%
  group_by(Hour) %>%
  summarise(Sightings = n())

# Plot the trend over hours
ggplot(sightings_by_hour, aes(x = as.numeric(Hour), y = Sightings)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  labs(title = "UFO Sightings Near Area 51 by Hour",
       x = "Hour of Day", y = "Number of Sightings") +
  theme_minimal()